The goal of this step in the workflow is to visually examine peatland water table elevation (WTE) data at 7 distinct sites in the Marcell Experimental Forest. To do so, we use 2 types of WTE (aka bogwell) data:
Install and load necessary R packages
Set date range of interest
min_year = 2017
max_year = 2021
First, we need to visualize the stripchart data for peatland daily water table elevation (aka bogwell data) at each of the 7 Marcell Experimental Forest sites:
Water table elevation (WTE) is typically measured in either meters or feet above sea level. For this project, we typically used meters above sea level.
More information about peatland and upland water elevation data collected at the Marcell Experimental Forest can be found on the Environmental Data Initiative Repository Portal.
Read in the most up-to-date data from the following Box file
path:
External-MEF_DATA/Hydro/WaterTableElevation/Bogwells/L1
External-MEF_DATA/EDI/peatland_water_table_daily/edi.562.2/data_objects/Clean the data into a tidy format
#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/WaterTableElevation/Bogwells/L1"
#read in the raw data
bogwell <- read_csv(here(filepath, "L1_Peatland_well_daily_history_2021forMia.csv"))
#clean the data
##note: we do not want any E values for the flag column
bogwell_clean <- bogwell %>%
clean_names() %>%
mutate(year = format(as.POSIXct(date, format = '%Y-%m-%d',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year)) %>%
subset(year >= min_year & year <= max_year)
#plot
p_bogwell <- ggplot(data = bogwell_clean) +
geom_line(aes(x = date, y = wte, color = peatland)) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")"),
color = "Site"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#static plot
p_bogwell
Subset the bogwell data to include only S1 observations
Plot
#extract only S1 data
bogwell_clean_s1 <- bogwell_clean %>%
subset(peatland == "S1")
#sample size
n <- nrow(bogwell_clean_s1)
#plot
p_bogwell_s1 <- ggplot(data = bogwell_clean_s1) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S1 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s1,
tooltip = "text") %>%
layout(
title = list(text = paste0("S1 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only S2 observations
Plot
#extract S2 observations
bogwell_clean_s2 <- bogwell_clean %>%
subset(peatland == "S2")
#sample size
n <- nrow(bogwell_clean_s2)
#plot
p_bogwell_s2 <- ggplot(data = bogwell_clean_s2) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S2 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s2,
tooltip = "text") %>%
layout(
title = list(text = paste0("S2 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only S3 observations
Plot
#extract S3 observations
bogwell_clean_s3 <- bogwell_clean %>%
subset(peatland == "S3")
#sample size
n <- nrow(bogwell_clean_s3)
#plot
p_bogwell_s3 <- ggplot(data = bogwell_clean_s3) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S3 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s3,
tooltip = "text") %>%
layout(
title = list(text = paste0("S3 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only S4 observations
Plot
#extract S4 observations
bogwell_clean_s4 <- bogwell_clean %>%
subset(peatland == "S4")
#sample size
n <- nrow(bogwell_clean_s4)
#plot
p_bogwell_s4 <- ggplot(data = bogwell_clean_s4) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S4 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s4,
tooltip = "text") %>%
layout(
title = list(text = paste0("S4 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only S5 observations
Plot
#extract s5 observations
bogwell_clean_s5 <- bogwell_clean %>%
subset(peatland == "S5")
#sample size
n <- nrow(bogwell_clean_s5)
#plot
p_bogwell_s5 <- ggplot(data = bogwell_clean_s5) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S5 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s5,
tooltip = "text") %>%
layout(
title = list(text = paste0("S5 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only S6 observations
Plot
#extract s6 observations
bogwell_clean_s6 <- bogwell_clean %>%
subset(peatland == "S6")
#sample size
n <- nrow(bogwell_clean_s6)
#plot
p_bogwell_s6 <- ggplot(data = bogwell_clean_s6) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S6 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_s6,
tooltip = "text") %>%
layout(
title = list(text = paste0("S6 Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Subset the bogwell data to include only Bog Lake Fen observations
Plot
#extract boglk observations
bogwell_clean_boglk <- bogwell_clean %>%
subset(peatland == "BOGLK")
#sample size
n <- nrow(bogwell_clean_boglk)
#plot
p_bogwell_boglk <- ggplot(data = bogwell_clean_boglk) +
geom_line(aes(x = date,
y = wte,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", wte))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("BOGLK Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_bogwell_boglk,
tooltip = "text") %>%
layout(
title = list(text = paste0("BOGLK Daily Peatland Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Shaft encoder bogwell data comes from 2 main sources:
RealTimeData Box directoryannual_appended_logger_files Box directoryreadCDL = function(file){
# read data file starting on 5th line
dat <- read.csv(file, sep=",",header=FALSE,skip=4,na.strings="NAN",stringsAsFactors=F)
# Read in just the header line (l2)
# unlist the line, and remove quotes
h <- readLines(file, n=2)[2]
n <- as.factor(unlist(strsplit(h, ",")) )
n2 <- gsub('"', "", n)
# assign column names to dataframe
colnames(dat) = n2
return(dat)
}
MetersPerFoot = 0.3048
FeetPerMeter = 3.28048
Read in the data from the following Box file path:
External-MEF_DATA/DataDump/RealTimeData
Clean the data into a tidy format
#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"
#read in the raw data
##note: this data is only 2022
s1_se <- readCDL(file = here(filepath, "S1-EM3_Table1.dat"))
#clean the data
s1_se_clean <- s1_se %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
wt_elev_ft = as.numeric(wt_elev_ft),
#calculate WTE in meters
wt_elev_m = wt_elev_ft * MetersPerFoot)
External-MEF_DATA/DataDump/annual_appended_logger_files/2019#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
#set the year
year = "2019"
#set the file name
file = "S1-EM3_Table1.dat"
#read in 2019 data
s1_2019 <- readCDL(file = here(filepath, year, file))
#clean 2019 data
s1_2019_clean <- s1_2019 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
wt_elev_ft = as.numeric(wt_elev_ft),
#calculate WTE in meters
wt_elev_m = wt_elev_ft * MetersPerFoot)
External-MEF_DATA/DataDump/annual_appended_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
#set the year
year = "2020"
#set the file name
file = "S1-EM3_Table1.dat"
#read in 2020 data
#note: this data is 2020 - 2022
s1_2020 <- readCDL(file = here(filepath, year, file))
#clean the 2020 data
s1_2020_clean <- s1_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
wt_elev_ft = as.numeric(wt_elev_ft),
#calculate WTE in m
wt_elev_m = wt_elev_ft * MetersPerFoot)
RealTimeData folder
and annual_appended_logger_files folder in Boxs1_all <- rbind(s1_se_clean, s1_2019_clean, s1_2020_clean) %>%
#remove NA values
filter(!is.na(wt_elev_m)) %>%
#extract year
mutate(year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year)) %>%
#remove duplicate entries
unique()
#set min and max year
min_year = min(s1_all$year)
max_year = max(s1_all$year)
#sample size
n <- nrow(s1_all)
#plot
p_s1 <- ggplot(data = s1_all) +
geom_line(aes(x = datetime,
y = wt_elev_m,
group = 1,
text = paste0("Date: ", datetime, "\n",
"Water Table Elevation (m): ", round(wt_elev_m, digits = 3)))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S1 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_s1,
tooltip = "text") %>%
layout(
title = list(text = paste0("S1 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
External-MEF_DATA/DataDump/RealTimeData
MaxWTElev column#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"
#read in the raw data
s2_se <- readCDL(file = here(filepath, "S2_bogwell_S2BW_Daily.dat"))
#clean the data
s2_se_clean <- s2_se %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
#calculate WTE in m
max_wt_elev_m = max_wt_elev_ft * MetersPerFoot)
External-MEF_DATA/DataDump/annual_appended_logger_files/2019#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
#set the year
year = "2019"
#set the file name
file = "S2bog_S2BW_Daily.dat"
#read in 2019 data
s2_2019 <- readCDL(file = here(filepath, year, file))
#clean 2019 data
s2_2019_clean <- s2_2019 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
#calculate WTE in meters
max_wt_elev_m = as.numeric(max_wt_elev_m))
External-MEF_DATA/DataDump/annual_appended_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
#set the year
year = "2020"
#set the file name
file = "S2bog_S2BW_Daily.dat"
#read in 2020 data
s2_2020 <- readCDL(file = here(filepath, year, file))
#clean 2020 data
s2_2020_clean <- s2_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
#calculate WTE in meters
max_wt_elev_m = as.numeric(max_wt_elev_m))
RealTimeData folder
and annual_appended_logger_files folder in Boxs2_all <- rbind(s2_se_clean, s2_2019_clean, s2_2020_clean) %>%
#remove NA values
filter(!is.na(max_wt_elev_m)) %>%
#remove rows where WTE = 0
subset(max_wt_elev_m > 0) %>%
#extract year
mutate(year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year)) %>%
#remove duplicate entries
unique()
#set min and max year
min_year = min(s2_all$year)
max_year = max(s2_all$year)
#sample size
n <- nrow(s2_all)
#plot
p_s2 <- ggplot(data = s2_all) +
geom_line(aes(x = datetime,
y = max_wt_elev_m,
group = 1,
text = paste0("Date: ", datetime, "\n",
"Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S2 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_s2,
tooltip = "text") %>%
layout(
title = list(text = paste0("S2 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
S3 was omitted due to unreliable data.
External-MEF_DATA/DataDump/annual_appended_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2020"
file = "S4_bogwell_S4BW_Daily.dat"
#read in 2020 data
s4_2020 <- readCDL(file = here(filepath, year, file))
#clean the data
s4_2020_clean <- s4_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year))
#remove unreliable values
s4_2020_clean$max_wt_elev_m[which(s4_2020_clean$max_wt_elev_m > 428.75)] <- NA
#set min and max year
min_year = min(s4_2020_clean$year)
max_year = max(s4_2020_clean$year)
#sample size
n <- nrow(s4_2020_clean)
#plot
p_s4 <- ggplot(data = s4_2020_clean) +
geom_line(aes(x = datetime,
y = max_wt_elev_m,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", max_wt_elev_m))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m)",
title = paste0("S4 Bogwell Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#interactive plot
ggplotly(p_s4,
tooltip = "text") %>%
layout(
title = list(text = paste0("S4 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
External-MEF_DATA/DataDump/annual_appended_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2020"
file = "S5_bogwell_S5BW_Daily.dat"
#read in 2020-2022 data
s5_2020 <- readCDL(file = here(filepath, year, file))
#clean the data
s5_2020_clean <- s5_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year))
Read in the 2022 data from the following Box file path:
External-MEF_DATA/DataDump/RealTimeData
Clean the 2022 data into a tidy format
#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"
#read in the raw data
##note: this data is 2021 - 2022
s5_se <- readCDL(file = here(filepath, "S5_bogwell_S5BW_Daily.dat"))
#clean the data
s5_se_clean <- s5_se %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year))
RealTimeData folder
and annual_appnded_logger_files folder in Boxs5_all <- rbind(s5_2020_clean, s5_se_clean) %>%
#remove NA values
filter(!is.na(max_wt_elev_m)) %>%
#remove duplicate observations
distinct()
#remove unreliable values
s5_all$max_wt_elev_m[which(s5_all$max_wt_elev_m > 424)] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-01-31", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-02", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-06", tz = "GMT"))] <- NA
s5_all$max_wt_elev_m[which(s5_all$date == as.POSIXct("2022-03-15", tz = "GMT"))] <- NA
#set min and max year
min_year = min(s5_se_clean$year)
max_year = max(s5_se_clean$year)
#sample size
n <- nrow(s5_all)
#plot
p_s5 <- ggplot(data = s5_all) +
geom_line(aes(x = datetime,
y = max_wt_elev_m,
group = 1,
text = paste0("Date: ", date, "\n",
"Water Table Elevation (m): ", max_wt_elev_m))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m)",
title = paste0("S5 Bogwell Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#interactive plot
ggplotly(p_s5,
tooltip = "text") %>%
layout(
title = list(text = paste0("S5 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Read in the 2022 data from the following Box file path:
External-MEF_DATA/DataDump/RealTimeData
Clean the 2022 data into a tidy format
#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"
#read in the raw data
##note: this data is 2021 - 2022
s6_se <- readCDL(file = here(filepath, "S6_bogwell_S6BW_Daily.dat"))
#clean the data
s6_se_clean <- s6_se %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year))
External-MEF_DATA/DataDump/annual_appended_logger_files/2019#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2019"
file = "S6_bogwell_S6BW_Daily.dat"
#read in 2019 data
s6_2019 <- readCDL(file = here(filepath, year, file))
#clean 2019 data
s6_2019_clean <- s6_2019 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m))
External-MEF_DATA/DataDump/annual_appended_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2020"
file = "S6_bogwell_S6BW_Daily.dat"
#read in 2020-2022 data
s6_2020 <- readCDL(file = here(filepath, year, file))
#clean 2020-2022 data
s6_2020_clean <- s6_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
max_wt_elev,
max_wt_elev_m) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = max_wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = as.numeric(max_wt_elev_m))
RealTimeData folder
and annual_appnded_logger_files folder in Boxs6_all <- rbind(s6_se_clean, s6_2019_clean, s6_2020_clean) %>%
#remove one row where WTE = 0
subset(max_wt_elev_m > 0) %>%
#remove NA values
filter(!is.na(max_wt_elev_m)) %>%
#remove duplicate observations
unique()
#set min and max year
min_year = min(s6_all$year)
max_year = max(s6_all$year)
#sample size
n <- nrow(s6_all)
#plot
p_s6 <- ggplot(data = s6_all) +
geom_line(aes(x = datetime,
y = max_wt_elev_m,
group = 1,
text = paste0("Date: ", date, "\n",
"Max Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("S6 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_s6,
tooltip = "text") %>%
layout(
title = list(text = paste0("S6 Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Read in the 2020-2022 data from the following Box file path:
External-MEF_DATA/DataDump/RealTimeData
Clean the 2020-2022 data into a tidy format
#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/RealTimeData"
#read in the raw shaft encoder data
boglk_se <- readCDL(file = here(filepath, "BLF_met_BogLakeW.dat"))
#clean the data
boglk_se_clean <- boglk_se %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
max_wt_elev_m = as.numeric(max_wt_elev_m),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year))
External-MEF_DATA/DataDump/annual_appende_logger_files/2019#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2019"
file = "BLF_met_BogLakeW.dat"
#read in 2019 data
boglk_2019 <- readCDL(file = here(filepath, year, file))
#clean 2019 data
boglk_2019_clean <- boglk_2019 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
max_wt_elev_m = as.numeric(max_wt_elev_m))
External-MEF_DATA/DataDump/annual_appende_logger_files/2020#set file path to read in data from Box
filepath = "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/DataDump/annual_appended_logger_files"
year = "2020"
file = "BLF_met_BogLakeW.dat"
#read in 2020 data
boglk_2020 <- readCDL(file = here(filepath, year, file))
#clean 2020 data
boglk_2020_clean <- boglk_2020 %>%
clean_names() %>%
#select relevant columns
select(timestamp,
wt_elev) %>%
#rename columns
rename(datetime = timestamp,
max_wt_elev_ft = wt_elev) %>%
#reformat columns into appropriate classes
mutate(datetime = as.POSIXct(datetime, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
max_wt_elev_ft = as.numeric(max_wt_elev_ft),
max_wt_elev_m = max_wt_elev_ft * MetersPerFoot,
max_wt_elev_m = as.numeric(max_wt_elev_m))
RealTimeData
folder and annual_appnded_logger_files folder in Boxboglk_all <- rbind(boglk_se_clean, boglk_2019_clean, boglk_2020_clean) %>%
#remove NA values
filter(!is.na(max_wt_elev_m)) %>%
#remove duplicate observations
unique()
#set min and max year
min_year = min(boglk_all$year)
max_year = max(boglk_all$year)
#sample size
n <- nrow(boglk_all)
#plot
p_boglk <- ggplot(data = boglk_all) +
geom_line(aes(x = datetime,
y = max_wt_elev_m,
group = 1,
text = paste0("Date: ", date, "\n",
"Max Water Table Elevation (m): ", round(max_wt_elev_m, digits = 3)))) +
theme_classic() +
labs(x = "Time",
y = "Water Table Elevation (m above sea level)",
title = paste0("Bog Lake Fen (BOGLK) Shaft Encoder \n Water Table Elevation (", min_year, "-", max_year, ")")
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7))
#interactive plot
ggplotly(p_boglk,
tooltip = "text") %>%
layout(
title = list(text = paste0("BOGLK Shaft Encoder Water Table Elevation (", min_year, "-", max_year, ")",
'<br>',
'<sup>',
#subtitle
paste0("n = ", n),
'</sup>')),
#title size
font = list(size = 14))
Next, we want to compare manual stripchart streamflow values and shaft encoder streamflow values at each site during the same time period in order to assess the similarities between the two types of data collection. Ultimately, we hope the shaft encoders are collecting very similar readings to the stripcharts in order to facilitate the transition from the old stripchart data collection technique to the new shaft encoder data collection technique.
To make these comparisons, we use 1:1 scatterplots with manual stripchart data plotted on the X axis and shaft encoder data plotted on the Y axis. We will also plot a y = x line on the plot to help visually assess if the shaft encoder values mimic the stripchart values. If they do, the points will fall along the y = x line.
Lastly, at each site, we will also calculate the differences between stripchart and shaft encoder stramflow values as well as the standard deviation of the differences.
Format the data
#format stripchart data
bog1 <- bogwell_clean_s1 %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart")
#format shaft encoder data
se1 <- s1_all %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#rename columns to join the data
rename(wte = wt_elev_m) %>%
group_by(date) %>%
#calculate max WTE per day
mutate(max_wte = max(wte)) %>%
#remove unnecessary columns
select(-datetime, -wt_elev_ft, -wte) %>%
distinct() %>%
#rename columns to join the data
rename(wte = max_wte)
#join the data
s1_join <- full_join(x = bog1,
y = se1,
by = c("wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
s1_join_sub <- s1_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart))
#set min and max year
min_year = min(s1_join_sub$year)
max_year = max(s1_join_sub$year)
#calculate the number of samples
s1_n <- nrow(s1_join_sub)
p1 <- ggplot(data = s1_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p1,
#set hovering text
tooltip = "text") %>%
layout(
title = list(text = paste0(paste0("S1 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
#subtitle
paste0("n = ", s1_n),
'</sup>')),
#title size
font = list(size = 12))
summary()s1_diff <- s1_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(s1_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.17488 -0.01869 0.01519 0.09393 0.04801 1.54165
s1_sd <- round(sd(s1_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.35164.
#set min and max year
min_year = min(s1_diff$year)
max_year = max(s1_diff$year)
#calculate the number of samples
s1_n <- nrow(s1_diff)
#plot
ggplot(data = s1_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.002) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("S1 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", s1_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#format stripchart data
bog2 <- bogwell_clean_s2 %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart")
#format shaft encoder data
se2 <- s2_se_clean %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#remove unnecessary columns
select(-datetime, -max_wt_elev_ft) %>%
#rename columns to join the data
rename(wte = max_wt_elev_m)
#join the data
s2_join <- full_join(x = bog2,
y = se2,
by = c("wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
s2_join_sub <- s2_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart))
#set min and max year
min_year = min(s2_join_sub$year)
max_year = max(s2_join_sub$year)
#calculate the number of samples
s2_n <- nrow(s2_join_sub)
p2 <- ggplot(data = s2_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p2,
tooltip = "text") %>%
layout(title = list(text = paste0(paste0("S2 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
paste0("n = ", s2_n),
'</sup>')),
font = list(size = 12))
summary()s2_diff <- s2_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(s2_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.018694 -0.002111 0.001167 0.001101 0.004255 0.046007
s2_sd <- round(sd(s2_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00526.
#set min and max year
min_year = min(s2_diff$year)
max_year = max(s2_diff$year)
#calculate the number of samples
s2_n <- nrow(s2_diff)
#plot
ggplot(data = s2_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.002) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("S2 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", s2_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
The S3 site was omitted due to unreliable data.
#format stripchart data
bog4 <- bogwell_clean_s4 %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart")
#format shaft encoder data
se4 <- s4_2020_clean %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#remove unnecessary columns
select(-datetime, -max_wt_elev_ft) %>%
#rename columns to join the data
rename(wte = max_wt_elev_m)
#join the data
s4_join <- full_join(x = bog4,
y = se4,
by = c("wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
s4_join_sub <- s4_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart))
#set min and max year
min_year = min(s4_join_sub$year)
max_year = max(s4_join_sub$year)
#calculate the number of samples
s4_n <- nrow(s4_join_sub)
p4 <- ggplot(data = s4_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p4,
tooltip = "text") %>%
layout(title = list(text = paste0(paste0("S4 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
paste0("n = ", s4_n),
'</sup>')),
font = list(size = 12)
)
summary()s4_diff <- s4_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(s4_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.0200000 0.0000000 0.0000000 -0.0002602 0.0000000 0.0200000
s4_sd <- round(sd(s4_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00607.
#set min and max year
min_year = min(s4_diff$year)
max_year = max(s4_diff$year)
#calculate the number of samples
s4_n <- nrow(s4_diff)
#plot
ggplot(data = s4_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.01) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("S4 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", s4_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#format stripchart data
bog5 <- bogwell_clean_s5 %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart")
#format shaft encoder data
se5 <- s5_all %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#remove unnecessary columns
select(-datetime, -max_wt_elev_ft) %>%
#rename columns to join the data
rename(wte = max_wt_elev_m)
#join the data
s5_join <- full_join(x = bog5,
y = se5,
by = c("wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
s5_join_sub <- s5_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart))
#set min and max year
min_year = min(s5_join_sub$year)
max_year = max(s5_join_sub$year)
#calculate the number of samples
s5_n <- nrow(s5_join_sub)
p5 <- ggplot(data = s5_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p5,
tooltip = "text") %>%
layout(title = list(text = paste0(paste0("S5 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
paste0("n = ", s5_n),
'</sup>')),
font = list(size = 12)
)
summary()s5_diff <- s5_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(s5_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.0200000 0.0000000 0.0000000 0.0003529 0.0000000 0.0200000
s5_sd <- round(sd(s5_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.00734.
#set min and max year
min_year = min(s5_diff$year)
max_year = max(s5_diff$year)
#calculate the number of samples
s5_n <- nrow(s5_diff)
#plot
ggplot(data = s5_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.01) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("S5 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", s5_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#format stripchart data
bog6 <- bogwell_clean_s6 %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart")
#format shaft encoder data
se6 <- s6_all %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#remove unnecessary columns
select(-datetime, -max_wt_elev_ft) %>%
#rename columns to join the data
rename(wte = max_wt_elev_m)
#join the data
s6_join <- full_join(x = bog6,
y = se6,
by = c("wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
s6_join_sub <- s6_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
#unnest the list to turn NULL values to NA values
unnest() %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart)) %>%
#remove duplicate rows
unique()
#set min and max year
min_year = min(s6_join_sub$year)
max_year = max(s6_join_sub$year)
#calculate the number of samples
s6_n <- nrow(s6_join_sub)
p6 <- ggplot(data = s6_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p6,
tooltip = "text") %>%
layout(title = list(text = paste0(paste0("S6 Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
paste0("n = ", s6_n),
'</sup>')),
font = list(size = 12)
)
summary()s6_diff <- s6_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(s6_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.080000 -0.010000 0.000000 -0.001182 0.010000 0.030000
s6_sd <- round(sd(s6_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.01252.
#set min and max year
min_year = min(s6_diff$year)
max_year = max(s6_diff$year)
#calculate the number of samples
s6_n <- nrow(s6_diff)
#plot
ggplot(data = s6_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.01) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("S6 Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", s6_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
Format the data
#format stripchart data
boglk <- bogwell_clean_boglk %>%
#remove unnecessary columns
select(-peatland, -flag) %>%
#create a column to note what type of data this is
mutate(type = "stripchart") %>%
unique()
#format shaft encoder data
se_boglk <- boglk_all %>%
#extract date and year
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year),
#note data type
type = "shaft_encoder") %>%
#remove unnecessary columns
select(-datetime, -max_wt_elev_ft) %>%
#rename columns to join the data
rename(wte = max_wt_elev_m) %>%
#calculate daily max WTE
group_by(date) %>%
summarise(max_wte = max(wte),
date = date,
year = year,
type = "shaft_encoder"
) %>%
#remove duplicate observations
unique()
#join the data
boglk_join <- full_join(x = boglk,
y = se_boglk,
by = c("wte" = "max_wte", "date", "year", "type"))
#subset the join to the time period where the two data sources overlap
boglk_join_sub <- boglk_join %>%
pivot_wider(names_from = "type",
values_from = "wte") %>%
unnest() %>%
#remove rows with NA values
filter(!is.na(shaft_encoder),
!is.na(stripchart)) %>%
unique()
#set min and max year
min_year = min(boglk_join_sub$year)
max_year = max(boglk_join_sub$year)
#calculate the number of samples
boglk_n <- nrow(boglk_join_sub)
p_boglk <- ggplot(data = boglk_join_sub) +
geom_point(aes(x = stripchart,
y = shaft_encoder,
alpha = 0.1,
group = 1,
text = paste0("Date: ", date, "\n",
"Stripchart: ", stripchart, "\n",
"Shaft Encoder: ", round(shaft_encoder, digits = 3)))) +
#plot straight line to compare point spread
geom_abline(intercept = 0,
slope = 1) +
theme_classic() +
labs(x = "Manual Stripchart",
y = "Shaft Encoder"
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7),
legend.position = "none")
#interactive plot
ggplotly(p = p_boglk,
tooltip = "text") %>%
layout(title = list(text = paste0(paste0("Bog Lake Fen (BOGLK) \n Daily Max WTE Comparison (", min_year, "-", max_year, ")"),
'<br>',
'<sup>',
paste0("n = ", boglk_n),
'</sup>')),
font = list(size = 12)
)
summary()boglk_diff <- boglk_join_sub %>%
mutate(diff = shaft_encoder - stripchart)
summary(boglk_diff$diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.084811 -0.035978 -0.016262 -0.019127 -0.003954 0.068180
boglk_sd <- round(sd(boglk_diff$diff), digits = 5)
The standard deviation of the differences between stripchart and shaft encoder values is approximately 0.02444.
#set min and max year
min_year = min(boglk_diff$year)
max_year = max(boglk_diff$year)
#calculate the number of samples
boglk_n <- nrow(boglk_diff)
#plot
ggplot(data = boglk_diff) +
geom_histogram(aes(x = diff),
binwidth = 0.005) +
theme_classic() +
labs(x = "Differences Between Shaft Encoders - Stripcharts (m)",
y = "Frequency",
title = paste0("Bog Lake Fen (BOGLK) \n Daily Max WTE Differences (", min_year, "-", max_year, ")"),
caption = paste0("n = ", boglk_n)
) +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
Use ggpairs() to create correlation matrices to
compare shaft encoder WTE data from each site
Use geom_smooth() to plot a linear regression line
for each panel
#format the data
s1 <- s1_join_sub %>%
mutate(site = "s1")
s2 <- s2_join_sub %>%
mutate(site = "s2")
s4 <- s4_join_sub %>%
mutate(site = "s4")
s5 <- s5_join_sub %>%
mutate(site = "s5")
s6 <- s6_join_sub %>%
mutate(site = "s6")
b <- boglk_join_sub %>%
mutate(site = "boglk")
cor_all <- rbind(s1, s2, s4, s5, s6, b) %>%
select(-stripchart)
cor_all_wide <- cor_all %>%
pivot_wider(names_from = "site",
values_from = "shaft_encoder")
my_fn <- function(data, mapping, ...){
p <- ggplot(data = data, mapping = mapping) +
geom_point() +
#geom_smooth(method=loess, fill="red", color="red", ...) +
geom_smooth(method=lm, fill="blue", color="blue", ...)
p
}
#plot the correlation matrix
ggpairs(data = cor_all_wide,
columns = c("s1", "s2", "s4", "s5", "s6", "boglk"),
title = "Shaft Encoder Correlations by Site",
xlab = "WTE (m)",
ylab = "WTE (m)",
#relabel the columns
columnLabels = c("S1", "S2", "S4", "S5", "S6", "BOGLK"),
#select the linear regression method
lower = list(continuous = my_fn),
#opacity
ggplot2::aes(alpha = 0.01)) +
theme_minimal() +
#adjust title aesthetics
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5,
size = 7)
)
Run this code chunk to knit to HTML without using the knit button in RStudio
index.html to render GitHub PageNote that this .Rmd file is being knit as index.html
into the public bogwell
GitHub repository in order to update the GitHub pages website,
where the plots can be viewed online.